


#######################################################################################################
##### calculate partial ROC for models ################################################################
#######################################################################################################

##########
#Settings#
##########

#load packages:
library(R.utils)
library(terra)

#basic model settings
#set project name:
projectName<-"WHOSnakes"

#define directories
MyDir<-paste("/home/jc217070/Maxent",sep="")
SWDDir<-paste(MyDir, "/Maxent_SWDs/spp_swds",sep="")
OutDirFinal<-paste(MyDir,"/Outputs3_Final/permutation.importance",sep="")
PlotDir<-paste(MyDir,"/Maxent_Data/Plots/Plots_WHOSnakes_2020_09_01",sep="")
dir.create(path=file.path(MyDir,"tmp.pbs"),showWarnings = FALSE)
PbsDir <- paste(MyDir, "/tmp.pbs", sep="")
setwd(PbsDir)

#read in lookup tables
DatSum<-read.table(file=file.path(paste(MyDir, "/Maxent_LUTables/",projectName,".csv",sep="")), header=TRUE, sep=",") #load data summary for all species
spp<-DatSum$gen_sp_subtax
mapspp<-unique(DatSum$ModUnit[DatSum$Listing_comment!="not listed"])
mapspp<-paste("ModUnit_", gsub(" ","_",mapspp), sep="")

#source("/home/jc217070/Maxent/R_scripts/partialROC.R")
its<-39






  
partialROC <- function(raster, points, p.points = 0.5, iterations = 39,
                       buf = NULL, omission = 0.01, save.pdf = T, n.thresh=100, pdf.name = "PartialROC"){
  
  require(terra)
  
  tmpFiles(current=TRUE, orphan=FALSE, old=FALSE, remove=TRUE)
  
  print(paste(Sys.time(),"starting ROC function"))
  
  ZeroOneNorm <- function(x){
    require(terra)
    mi <- global(x, min, na.rm = T)[, 1]
    ma <- global(x, max, na.rm = T)[, 1]
    x.norm <- (x - mi)/(ma - mi)
  }
  
  if(!is.null(buf)){
    p <- vect(as.matrix(points[, -3]))
    bu <- buffer(p, width = buf)
    
    bur <- rasterize(bu, raster)
    
    raster <- mask(raster, bur)
  }
  
  
  
  raster <- log(raster + 0.1)
  
  if(omission > 0){
    vals <- terra::extract(raster, points[, 1:2])[,2]
    q <- quantile(vals, omission, na.rm = T)
    om.r <- raster > q
    om.r <- classify(om.r, rcl = matrix(c(-Inf, 0, NA), ncol = 3, byrow = T))
    raster <- mask(raster, om.r)
    
    points <- points[vals > q, ]
  }
  
  print(paste(Sys.time(),"normalizing suitability layer"))
  
  r <- ZeroOneNorm(raster)
  r <- round(r, 2)
  
  thres <- seq(0, 1, len = n.thresh+1)
  
  print(paste(Sys.time(),"thresholding suitability layer"))
  
  #Thresholding suitability layer
  r.thr <- r >= thres
  area.pred <- global(r.thr, mean, na.rm = T)$mean
  dArea <- area.pred[1:n.thresh] - area.pred[2:(n.thresh+1)]
  
  print(paste(Sys.time(),"converting thresholded rasters to dataframe"))
  
  points.r <- as.data.frame(r.thr, xy = T)[, c("x", "y")]
  
  mp <- matrix(0, nrow = iterations, ncol = length(area.pred))
  mr <- matrix(0, nrow = iterations, ncol = length(area.pred))
  
  areas <- matrix(0, nrow = iterations, ncol = 4)
  colnames(areas) <- c("iteration", 
                       "Area.pres",
                       "Area.rand",
                       "PartialROC")
  
  print(paste(Sys.time(),"running", iterations, "iterations"))
  
  for(i in 1:iterations){
    
    print(paste(Sys.time(), "iteration", i))
    
    samp.pres <- sample(1:nrow(points), size = nrow(points) * p.points, replace = F)
    samp.rand <- sample(1:nrow(points.r), size = nrow(points) * p.points, replace = F)
    
    pres.thr <- terra::extract(r.thr, points[samp.pres, 1:2])[, -1]
    rand.thr <- terra::extract(r.thr, points.r[samp.rand, ])[, -1]
    
    
    #Calculating proportion of predicted points  
    means.pres <- colMeans(pres.thr)
    means.rand <- colMeans(rand.thr)
    
    #Calculating areas
    
    rects.pres <- dArea * means.pres[-(n.thresh+1)]
    rects.rand <- dArea * means.rand[-(n.thresh+1)]
    
    Area.pres <- sum(rects.pres)
    Area.rand <- sum(rects.rand)
    Area.ratio <- Area.pres / Area.rand
    
    areas[i, ] <- c(iteration = i, 
                    Area.pres = Area.pres,
                    Area.rand = Area.rand,
                    PartialROC = Area.ratio)
    mp[i, ] <- means.pres
    mr[i, ] <- means.rand
  }
  
  areas  <- as.data.frame(areas)
  
  rat <- round(mean(areas$PartialROC), 2)
  P <- with(areas, length(which(PartialROC < 1))/iterations)
  
  print(paste(Sys.time(),"making PDF"))
  
  
  if(save.pdf){
    pdf(paste0(pdf.name, ".pdf"), width = 5, height = 5)
    plot(area.pred, colMeans(mp), xlab = "% Area predicted", 
         ylab = "1 - Omission error", col = "grey95", type = "l", 
         xlim = c(0, 1), ylim = c(0, 1), main = paste0("AUC ratio = ", rat, "\n P = ", P))
    lines(area.pred, colMeans(mr), col = "grey95", lty = 2, type = "l")
    for(j in 1:iterations){
      lines(area.pred, mp[j, ], col = "grey95", lwd = 0.25, type = "l")
      lines(area.pred, mr[j, ], col = "grey95", lty = 2, lwd = 0.25, type = "l")
    }
    lines(area.pred, colMeans(mp, na.rm = T), col = "red", lwd = 1.5, type = "s")
    lines(area.pred, colMeans(mr, na.rm = T), type = "s")
    dev.off()
  }
  
  areas <- as.data.frame(areas)
  
  print(paste(Sys.time(),"finished ROC function"))
  
  return(areas)
}

pROCs<-vector()
P_pROCs<-vector()
fin_spp<-vector()

for (sp in mapsppB){
  
  myfile<-paste(OutDirFinal,"/",sp,"/", sp,"_CurrentCrop.asc",sep="")
  if(file.exists(paste(myfile,".gz", sep=""))){
    
    tmpFiles(current=TRUE, orphan=FALSE, old=FALSE, remove=TRUE)
    
    print(paste(Sys.time(),"getting partial ROCs for",sp))
    
    gunzip(filename=paste(myfile,".gz", sep=""), #unzip model
           destname=paste(myfile, sep=""),
           remove=FALSE, overwrite=TRUE)
    myraster<-rast(myfile)
    print(myraster)
    size<-as.numeric(file.info(myfile)[1])/1000000000
    print(paste("raster file size=", size,"GB"))
    
    myts<-(1/(size*0.025))
    myts<-ifelse(myts>100,100,myts)
    
    mypoints<-read.table(paste(SWDDir,"/",sp,".csv",sep=""), header=TRUE, sep=",")
    mypoints<-mypoints[,2:3]
    
    print(summary(mypoints))
    
    myts2<-(1/((length(mypoints[1,])/1000)*0.025))
    
    myts<-min(myts,myts2)
    myts<-trunc(myts)
    
    print(paste("using",myts,"thresholds"))
    
    ROCresults<-partialROC(raster=myraster, points=mypoints, iterations = its, n.thresh = myts,
                           pdf.name = paste(PlotDir,"/PartialROC_",sp,sep=""))
    write.table(x=ROCresults, file=paste(PlotDir,"/PartialROC_",sp,".csv",sep=""),   
                na = "NA", row.names = FALSE, col.names = TRUE, sep=",")
    
    pROC<-paste(mean(na.omit(ROCresults$PartialROC)))
    pROCs<-c(pROCs,pROC)
    P_pROC <- paste(length(which(ROCresults$PartialROC < 1))/its)
    P_pROCs<-c(P_pROCs,P_pROC)
    fin_spp<-c(fin_spp,sp)
    P_pROCs_df<-cbind(fin_spp,pROCs,P_pROCs)
    names(P_pROCs_df)<-c("MU","partial_ROC","p_value")
    write.table(x=P_pROCs_df, file=out.csv,   
                na = "NA", row.names = FALSE, col.names = TRUE, sep=",")
    
    file.remove(myfile)
  }
}







